Goal: count SNP difference between LTR and LTNR on a per gene basis
library(tidyverse)
library(ggrepel)
get the vcf header
vcf.header <- system("zgrep '#C' ../input/LT.vcf.gz",intern = TRUE)
vcf.header
[1] "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t38LTR\t42LTR\t42LTRR\t43LTR\t43LTRR\t49LTWR\t49LTWRR\t95LTWR\t95LTWRR\t99LTWR"
vcf.header <- vcf.header %>%
str_replace("#","") %>% #get rid of the pound sign
str_split(pattern = "\t") %>% #split on the tabs
magrittr::extract2(1)
vcf.header
[1] "CHROM" "POS" "ID" "REF" "ALT" "QUAL" "FILTER" "INFO"
[9] "FORMAT" "38LTR" "42LTR" "42LTRR" "43LTR" "43LTRR" "49LTWR" "49LTWRR"
[17] "95LTWR" "95LTWRR" "99LTWR"
get the data
snps <- read_tsv("../input/LT.vcf.gz", na = c("","NA","."),comment="#",col_names = vcf.header) %>%
select(-ID, -FILTER) # these are empty columns
Parsed with column specification:
cols(
CHROM = col_character(),
POS = col_integer(),
ID = col_character(),
REF = col_character(),
ALT = col_character(),
QUAL = col_double(),
FILTER = col_character(),
INFO = col_character(),
FORMAT = col_character(),
`38LTR` = col_character(),
`42LTR` = col_character(),
`42LTRR` = col_character(),
`43LTR` = col_character(),
`43LTRR` = col_character(),
`49LTWR` = col_character(),
`49LTWRR` = col_character(),
`95LTWR` = col_character(),
`95LTWRR` = col_character(),
`99LTWR` = col_character()
)
|==== | 5% 23 MB
|==== | 6% 24 MB
|==== | 6% 24 MB
|==== | 6% 24 MB
|==== | 6% 25 MB
|==== | 6% 25 MB
|==== | 6% 25 MB
|==== | 6% 26 MB
|===== | 6% 26 MB
|===== | 6% 27 MB
|===== | 6% 27 MB
|===== | 6% 27 MB
|===== | 7% 28 MB
|===== | 7% 28 MB
|===== | 7% 29 MB
|===== | 7% 29 MB
|===== | 7% 29 MB
|===== | 7% 30 MB
|===== | 7% 30 MB
|===== | 7% 30 MB
|===== | 7% 31 MB
|===== | 7% 31 MB
|====== | 8% 32 MB
|====== | 8% 32 MB
|====== | 8% 32 MB
|====== | 8% 33 MB
|====== | 8% 33 MB
|====== | 8% 34 MB
|====== | 8% 34 MB
|====== | 8% 34 MB
|====== | 8% 35 MB
|====== | 8% 35 MB
|====== | 9% 36 MB
|====== | 9% 36 MB
|====== | 9% 36 MB
|====== | 9% 37 MB
|======= | 9% 37 MB
|======= | 9% 37 MB
|======= | 9% 38 MB
|======= | 9% 38 MB
|======= | 9% 39 MB
|======= | 9% 39 MB
|======= | 9% 39 MB
|======= | 10% 40 MB
|======= | 10% 40 MB
|======= | 10% 40 MB
|======= | 10% 41 MB
|======= | 10% 41 MB
|======= | 10% 42 MB
|======= | 10% 42 MB
|======== | 10% 42 MB
|======== | 10% 43 MB
|======== | 10% 43 MB
|======== | 11% 44 MB
|======== | 11% 44 MB
|======== | 11% 44 MB
|======== | 11% 45 MB
|======== | 11% 45 MB
|======== | 11% 46 MB
|======== | 11% 46 MB
|======== | 11% 46 MB
|======== | 11% 47 MB
|======== | 11% 47 MB
|========= | 12% 47 MB
|========= | 12% 48 MB
|========= | 12% 48 MB
|========= | 12% 49 MB
|========= | 12% 49 MB
|========= | 12% 49 MB
|========= | 12% 50 MB
|========= | 12% 50 MB
|========= | 12% 51 MB
|========= | 12% 51 MB
|========= | 12% 51 MB
|========= | 13% 52 MB
|========= | 13% 52 MB
|========= | 13% 53 MB
|========== | 13% 53 MB
|========== | 13% 53 MB
|========== | 13% 54 MB
|========== | 13% 54 MB
|========== | 13% 54 MB
|========== | 13% 55 MB
|========== | 13% 55 MB
|========== | 14% 56 MB
|========== | 14% 56 MB
|========== | 14% 56 MB
|========== | 14% 57 MB
|========== | 14% 57 MB
|========== | 14% 57 MB
|========== | 14% 58 MB
|=========== | 14% 58 MB
|=========== | 14% 59 MB
|=========== | 14% 59 MB
|=========== | 15% 59 MB
|=========== | 15% 60 MB
|=========== | 15% 60 MB
|=========== | 15% 61 MB
|=========== | 15% 61 MB
|=========== | 15% 61 MB
|=========== | 15% 62 MB
|=========== | 15% 62 MB
|=========== | 15% 63 MB
|=========== | 15% 63 MB
|=========== | 15% 63 MB
|============ | 16% 64 MB
|============ | 16% 64 MB
|============ | 16% 64 MB
|============ | 16% 65 MB
|============ | 16% 65 MB
|============ | 16% 66 MB
|============ | 16% 66 MB
|============ | 16% 66 MB
|============ | 16% 67 MB
|============ | 16% 67 MB
|============ | 17% 68 MB
|============ | 17% 68 MB
|============ | 17% 68 MB
|============= | 17% 69 MB
|============= | 17% 69 MB
|============= | 17% 69 MB
|============= | 17% 70 MB
|============= | 17% 70 MB
|============= | 17% 71 MB
|============= | 17% 71 MB
|============= | 18% 71 MB
|============= | 18% 72 MB
|============= | 18% 72 MB
|============= | 18% 73 MB
|============= | 18% 73 MB
|============= | 18% 73 MB
|============= | 18% 74 MB
|============== | 18% 74 MB
|============== | 18% 74 MB
|============== | 18% 75 MB
|============== | 18% 75 MB
|============== | 19% 76 MB
|============== | 19% 76 MB
|============== | 19% 76 MB
|============== | 19% 77 MB
|============== | 19% 77 MB
|============== | 19% 78 MB
|============== | 19% 78 MB
|============== | 19% 78 MB
|============== | 19% 79 MB
|============== | 19% 79 MB
|=============== | 20% 79 MB
|=============== | 20% 80 MB
|=============== | 20% 80 MB
|=============== | 20% 81 MB
|=============== | 20% 81 MB
|=============== | 20% 81 MB
|=============== | 20% 82 MB
|=============== | 20% 82 MB
|=============== | 20% 83 MB
|=============== | 20% 83 MB
|=============== | 21% 83 MB
|=============== | 21% 84 MB
|=============== | 21% 84 MB
|=============== | 21% 84 MB
|================ | 21% 85 MB
|================ | 21% 85 MB
|================ | 21% 86 MB
|================ | 21% 86 MB
|================ | 21% 86 MB
|================ | 21% 87 MB
|================ | 21% 87 MB
|================ | 22% 87 MB
|================ | 22% 88 MB
|================ | 22% 88 MB
|================ | 22% 89 MB
|================ | 22% 89 MB
|================ | 22% 89 MB
|================ | 22% 90 MB
|================= | 22% 90 MB
|================= | 22% 91 MB
|================= | 22% 91 MB
|================= | 23% 91 MB
|================= | 23% 92 MB
|================= | 23% 92 MB
|================= | 23% 92 MB
|================= | 23% 93 MB
|================= | 23% 93 MB
|================= | 23% 94 MB
|================= | 23% 94 MB
|================= | 23% 94 MB
|================= | 23% 95 MB
|================= | 23% 95 MB
|================== | 24% 95 MB
|================== | 24% 96 MB
|================== | 24% 96 MB
|================== | 24% 97 MB
|================== | 24% 97 MB
|================== | 24% 97 MB
|================== | 24% 98 MB
|================== | 24% 98 MB
|================== | 24% 99 MB
|================== | 24% 99 MB
|================== | 25% 99 MB
|================== | 25% 100 MB
|================== | 25% 100 MB
|================== | 25% 100 MB
|=================== | 25% 101 MB
|=================== | 25% 101 MB
|=================== | 25% 102 MB
|=================== | 25% 102 MB
|=================== | 25% 102 MB
|=================== | 25% 103 MB
|=================== | 25% 103 MB
|=================== | 26% 104 MB
|=================== | 26% 104 MB
|=================== | 26% 104 MB
|=================== | 26% 105 MB
|=================== | 26% 105 MB
|=================== | 26% 105 MB
|=================== | 26% 106 MB
|==================== | 26% 106 MB
|==================== | 26% 107 MB
|==================== | 26% 107 MB
|==================== | 27% 107 MB
|==================== | 27% 108 MB
|==================== | 27% 108 MB
|==================== | 27% 109 MB
|==================== | 27% 109 MB
|==================== | 27% 109 MB
|==================== | 27% 110 MB
|==================== | 27% 110 MB
|==================== | 27% 110 MB
|==================== | 27% 111 MB
|==================== | 27% 111 MB
|===================== | 28% 112 MB
|===================== | 28% 112 MB
|===================== | 28% 112 MB
|===================== | 28% 113 MB
|===================== | 28% 113 MB
|===================== | 28% 114 MB
|===================== | 28% 114 MB
|===================== | 28% 114 MB
|===================== | 28% 115 MB
|===================== | 28% 115 MB
|===================== | 29% 116 MB
|===================== | 29% 116 MB
|===================== | 29% 116 MB
|====================== | 29% 117 MB
|====================== | 29% 117 MB
|====================== | 29% 117 MB
|====================== | 29% 118 MB
|====================== | 29% 118 MB
|====================== | 29% 119 MB
|====================== | 29% 119 MB
|====================== | 30% 119 MB
|====================== | 30% 120 MB
|====================== | 30% 120 MB
|====================== | 30% 121 MB
|====================== | 30% 121 MB
|====================== | 30% 121 MB
|====================== | 30% 122 MB
|======================= | 30% 122 MB
|======================= | 30% 122 MB
|======================= | 30% 123 MB
|======================= | 31% 123 MB
|======================= | 31% 124 MB
|======================= | 31% 124 MB
|======================= | 31% 124 MB
|======================= | 31% 125 MB
|======================= | 31% 125 MB
|======================= | 31% 126 MB
|======================= | 31% 126 MB
|======================= | 31% 126 MB
|======================= | 31% 127 MB
|======================= | 31% 127 MB
|======================== | 32% 128 MB
|======================== | 32% 128 MB
|======================== | 32% 128 MB
|======================== | 32% 129 MB
|======================== | 32% 129 MB
|======================== | 32% 129 MB
|======================== | 32% 130 MB
|======================== | 32% 130 MB
|======================== | 32% 131 MB
|======================== | 32% 131 MB
|======================== | 33% 131 MB
|======================== | 33% 132 MB
|======================== | 33% 132 MB
|========================= | 33% 133 MB
|========================= | 33% 133 MB
|========================= | 33% 133 MB
|========================= | 33% 134 MB
|========================= | 33% 134 MB
|========================= | 33% 135 MB
|========================= | 33% 135 MB
|========================= | 34% 135 MB
|========================= | 34% 136 MB
|========================= | 34% 136 MB
|========================= | 34% 137 MB
|========================= | 34% 137 MB
|========================= | 34% 137 MB
|========================= | 34% 138 MB
|========================== | 34% 138 MB
|========================== | 34% 139 MB
|========================== | 34% 139 MB
|========================== | 35% 139 MB
|========================== | 35% 140 MB
|========================== | 35% 140 MB
|========================== | 35% 141 MB
|========================== | 35% 141 MB
|========================== | 35% 141 MB
|========================== | 35% 142 MB
|========================== | 35% 142 MB
|========================== | 35% 142 MB
|========================== | 35% 143 MB
|=========================== | 36% 143 MB
|=========================== | 36% 144 MB
|=========================== | 36% 144 MB
|=========================== | 36% 144 MB
|=========================== | 36% 145 MB
|=========================== | 36% 145 MB
|=========================== | 36% 146 MB
|=========================== | 36% 146 MB
|=========================== | 36% 146 MB
|=========================== | 36% 147 MB
|=========================== | 37% 147 MB
|=========================== | 37% 148 MB
|=========================== | 37% 148 MB
|=========================== | 37% 148 MB
|============================ | 37% 149 MB
|============================ | 37% 149 MB
|============================ | 37% 150 MB
|============================ | 37% 150 MB
|============================ | 37% 150 MB
|============================ | 37% 151 MB
|============================ | 37% 151 MB
|============================ | 38% 151 MB
|============================ | 38% 152 MB
|============================ | 38% 152 MB
|============================ | 38% 153 MB
|============================ | 38% 153 MB
|============================ | 38% 153 MB
|============================= | 38% 154 MB
|============================= | 38% 154 MB
|============================= | 38% 155 MB
|============================= | 38% 155 MB
|============================= | 39% 155 MB
|============================= | 39% 156 MB
|============================= | 39% 156 MB
|============================= | 39% 157 MB
|============================= | 39% 157 MB
|============================= | 39% 157 MB
|============================= | 39% 158 MB
|============================= | 39% 158 MB
|============================= | 39% 159 MB
|============================= | 39% 159 MB
|============================== | 40% 159 MB
|============================== | 40% 160 MB
|============================== | 40% 160 MB
|============================== | 40% 160 MB
|============================== | 40% 161 MB
|============================== | 40% 161 MB
|============================== | 40% 162 MB
|============================== | 40% 162 MB
|============================== | 40% 162 MB
|============================== | 40% 163 MB
|============================== | 41% 163 MB
|============================== | 41% 164 MB
|============================== | 41% 164 MB
|=============================== | 41% 164 MB
|=============================== | 41% 165 MB
|=============================== | 41% 165 MB
|=============================== | 41% 166 MB
|=============================== | 41% 166 MB
|=============================== | 41% 166 MB
|=============================== | 41% 167 MB
|=============================== | 42% 167 MB
|=============================== | 42% 168 MB
|=============================== | 42% 168 MB
|=============================== | 42% 168 MB
|=============================== | 42% 169 MB
|=============================== | 42% 169 MB
|================================ | 42% 170 MB
|================================ | 42% 170 MB
|================================ | 42% 171 MB
|================================ | 42% 171 MB
|================================ | 43% 171 MB
|================================ | 43% 172 MB
|================================ | 43% 172 MB
|================================ | 43% 173 MB
|================================ | 43% 173 MB
|================================ | 43% 173 MB
|================================ | 43% 174 MB
|================================ | 43% 174 MB
|================================ | 43% 175 MB
|================================= | 44% 175 MB
|================================= | 44% 175 MB
|================================= | 44% 176 MB
|================================= | 44% 176 MB
|================================= | 44% 177 MB
|================================= | 44% 177 MB
|================================= | 44% 177 MB
|================================= | 44% 178 MB
|================================= | 44% 178 MB
|================================= | 44% 179 MB
|================================= | 44% 179 MB
|================================= | 45% 179 MB
|================================= | 45% 180 MB
|================================= | 45% 180 MB
|================================== | 45% 181 MB
|================================== | 45% 181 MB
|================================== | 45% 181 MB
|================================== | 45% 182 MB
|================================== | 45% 182 MB
|================================== | 45% 183 MB
|================================== | 46% 183 MB
|================================== | 46% 183 MB
|================================== | 46% 184 MB
|================================== | 46% 184 MB
|================================== | 46% 185 MB
|================================== | 46% 185 MB
|================================== | 46% 185 MB
|=================================== | 46% 186 MB
|=================================== | 46% 186 MB
|=================================== | 46% 187 MB
|=================================== | 46% 187 MB
|=================================== | 47% 187 MB
|=================================== | 47% 188 MB
|=================================== | 47% 188 MB
|=================================== | 47% 188 MB
|=================================== | 47% 189 MB
|=================================== | 47% 189 MB
|=================================== | 47% 190 MB
|=================================== | 47% 190 MB
|=================================== | 47% 190 MB
|=================================== | 47% 191 MB
|==================================== | 48% 191 MB
|==================================== | 48% 192 MB
|==================================== | 48% 192 MB
|==================================== | 48% 193 MB
|==================================== | 48% 193 MB
|==================================== | 48% 193 MB
|==================================== | 48% 194 MB
|==================================== | 48% 194 MB
|==================================== | 48% 194 MB
|==================================== | 48% 195 MB
|==================================== | 49% 195 MB
|==================================== | 49% 196 MB
|==================================== | 49% 196 MB
|===================================== | 49% 196 MB
|===================================== | 49% 197 MB
|===================================== | 49% 197 MB
|===================================== | 49% 198 MB
|===================================== | 49% 198 MB
|===================================== | 49% 199 MB
|===================================== | 49% 199 MB
|===================================== | 50% 199 MB
|===================================== | 50% 200 MB
|===================================== | 50% 200 MB
|===================================== | 50% 201 MB
|===================================== | 50% 201 MB
|===================================== | 50% 201 MB
|====================================== | 50% 202 MB
|====================================== | 50% 202 MB
|====================================== | 50% 203 MB
|====================================== | 50% 203 MB
|====================================== | 51% 203 MB
|====================================== | 51% 204 MB
|====================================== | 51% 204 MB
|====================================== | 51% 204 MB
|====================================== | 51% 205 MB
|====================================== | 51% 205 MB
|====================================== | 51% 206 MB
|====================================== | 51% 206 MB
|====================================== | 51% 206 MB
|====================================== | 51% 207 MB
|======================================= | 52% 207 MB
|======================================= | 52% 208 MB
|======================================= | 52% 208 MB
|======================================= | 52% 208 MB
|======================================= | 52% 209 MB
|======================================= | 52% 209 MB
|======================================= | 52% 210 MB
|======================================= | 52% 210 MB
|======================================= | 52% 210 MB
|======================================= | 52% 211 MB
|======================================= | 53% 211 MB
|======================================= | 53% 212 MB
|======================================= | 53% 212 MB
|======================================== | 53% 212 MB
|======================================== | 53% 213 MB
|======================================== | 53% 213 MB
|======================================== | 53% 213 MB
|======================================== | 53% 214 MB
|======================================== | 53% 214 MB
|======================================== | 53% 215 MB
|======================================== | 54% 215 MB
|======================================== | 54% 216 MB
|======================================== | 54% 216 MB
|======================================== | 54% 216 MB
|======================================== | 54% 217 MB
|======================================== | 54% 217 MB
|======================================== | 54% 217 MB
|========================================= | 54% 218 MB
|========================================= | 54% 218 MB
|========================================= | 54% 219 MB
|========================================= | 55% 219 MB
|========================================= | 55% 219 MB
|========================================= | 55% 220 MB
|========================================= | 55% 220 MB
|========================================= | 55% 221 MB
|========================================= | 55% 221 MB
|========================================= | 55% 221 MB
|========================================= | 55% 222 MB
|========================================= | 55% 222 MB
|========================================= | 55% 223 MB
|========================================= | 55% 223 MB
|========================================== | 56% 223 MB
|========================================== | 56% 224 MB
|========================================== | 56% 224 MB
|========================================== | 56% 224 MB
|========================================== | 56% 225 MB
|========================================== | 56% 225 MB
|========================================== | 56% 226 MB
|========================================== | 56% 226 MB
|========================================== | 56% 226 MB
|========================================== | 56% 227 MB
|========================================== | 57% 227 MB
|========================================== | 57% 228 MB
|========================================== | 57% 228 MB
|=========================================== | 57% 228 MB
|=========================================== | 57% 229 MB
|=========================================== | 57% 229 MB
|=========================================== | 57% 230 MB
|=========================================== | 57% 230 MB
|=========================================== | 57% 230 MB
|=========================================== | 57% 231 MB
|=========================================== | 58% 231 MB
|=========================================== | 58% 232 MB
|=========================================== | 58% 232 MB
|=========================================== | 58% 232 MB
|=========================================== | 58% 233 MB
|=========================================== | 58% 233 MB
|=========================================== | 58% 233 MB
|============================================ | 58% 234 MB
|============================================ | 58% 234 MB
|============================================ | 58% 235 MB
|============================================ | 59% 235 MB
|============================================ | 59% 235 MB
|============================================ | 59% 236 MB
|============================================ | 59% 236 MB
|============================================ | 59% 237 MB
|============================================ | 59% 237 MB
|============================================ | 59% 237 MB
|============================================ | 59% 238 MB
|============================================ | 59% 238 MB
|============================================ | 59% 238 MB
|============================================ | 59% 239 MB
|============================================= | 60% 239 MB
|============================================= | 60% 240 MB
|============================================= | 60% 240 MB
|============================================= | 60% 240 MB
|============================================= | 60% 241 MB
|============================================= | 60% 241 MB
|============================================= | 60% 242 MB
|============================================= | 60% 242 MB
|============================================= | 60% 242 MB
|============================================= | 60% 243 MB
|============================================= | 61% 243 MB
|============================================= | 61% 244 MB
|============================================= | 61% 244 MB
|============================================== | 61% 244 MB
|============================================== | 61% 245 MB
|============================================== | 61% 245 MB
|============================================== | 61% 246 MB
|============================================== | 61% 246 MB
|============================================== | 61% 246 MB
|============================================== | 61% 247 MB
|============================================== | 62% 247 MB
|============================================== | 62% 248 MB
|============================================== | 62% 248 MB
|============================================== | 62% 248 MB
|============================================== | 62% 249 MB
|============================================== | 62% 249 MB
|============================================== | 62% 249 MB
|=============================================== | 62% 250 MB
|=============================================== | 62% 250 MB
|=============================================== | 62% 251 MB
|=============================================== | 63% 251 MB
|=============================================== | 63% 251 MB
|=============================================== | 63% 252 MB
|=============================================== | 63% 252 MB
|=============================================== | 63% 253 MB
|=============================================== | 63% 253 MB
|=============================================== | 63% 253 MB
|=============================================== | 63% 254 MB
|=============================================== | 63% 254 MB
|=============================================== | 63% 255 MB
|================================================ | 64% 255 MB
|================================================ | 64% 255 MB
|================================================ | 64% 256 MB
|================================================ | 64% 256 MB
|================================================ | 64% 256 MB
|================================================ | 64% 257 MB
|================================================ | 64% 257 MB
|================================================ | 64% 258 MB
|================================================ | 64% 258 MB
|================================================ | 64% 258 MB
|================================================ | 64% 259 MB
|================================================ | 65% 259 MB
|================================================ | 65% 260 MB
|================================================ | 65% 260 MB
|================================================= | 65% 260 MB
|================================================= | 65% 261 MB
|================================================= | 65% 261 MB
|================================================= | 65% 262 MB
|================================================= | 65% 262 MB
|================================================= | 65% 262 MB
|================================================= | 65% 263 MB
|================================================= | 66% 263 MB
|================================================= | 66% 264 MB
|================================================= | 66% 264 MB
|================================================= | 66% 264 MB
|================================================= | 66% 265 MB
|================================================= | 66% 265 MB
|================================================== | 66% 265 MB
|================================================== | 66% 266 MB
|================================================== | 66% 266 MB
|================================================== | 66% 267 MB
|================================================== | 67% 267 MB
|================================================== | 67% 267 MB
|================================================== | 67% 268 MB
|================================================== | 67% 268 MB
|================================================== | 67% 269 MB
|================================================== | 67% 269 MB
|================================================== | 67% 269 MB
|================================================== | 67% 270 MB
|================================================== | 67% 270 MB
|================================================== | 67% 271 MB
|=================================================== | 68% 271 MB
|=================================================== | 68% 271 MB
|=================================================== | 68% 272 MB
|=================================================== | 68% 272 MB
|=================================================== | 68% 273 MB
|=================================================== | 68% 273 MB
|=================================================== | 68% 273 MB
|=================================================== | 68% 274 MB
|=================================================== | 68% 274 MB
|=================================================== | 68% 274 MB
|=================================================== | 69% 275 MB
|=================================================== | 69% 275 MB
|=================================================== | 69% 276 MB
|=================================================== | 69% 276 MB
|==================================================== | 69% 277 MB
|==================================================== | 69% 277 MB
|==================================================== | 69% 277 MB
|==================================================== | 69% 278 MB
|==================================================== | 69% 278 MB
|==================================================== | 69% 279 MB
|==================================================== | 70% 279 MB
|==================================================== | 70% 279 MB
|==================================================== | 70% 280 MB
|==================================================== | 70% 280 MB
|==================================================== | 70% 280 MB
|==================================================== | 70% 281 MB
|==================================================== | 70% 281 MB
|===================================================== | 70% 282 MB
|===================================================== | 70% 282 MB
|===================================================== | 70% 282 MB
|===================================================== | 71% 283 MB
|===================================================== | 71% 283 MB
|===================================================== | 71% 284 MB
|===================================================== | 71% 284 MB
|===================================================== | 71% 284 MB
|===================================================== | 71% 285 MB
|===================================================== | 71% 285 MB
|===================================================== | 71% 285 MB
|===================================================== | 71% 286 MB
|===================================================== | 71% 286 MB
|===================================================== | 71% 287 MB
|====================================================== | 72% 287 MB
|====================================================== | 72% 287 MB
|====================================================== | 72% 288 MB
|====================================================== | 72% 288 MB
|====================================================== | 72% 289 MB
|====================================================== | 72% 289 MB
|====================================================== | 72% 289 MB
|====================================================== | 72% 290 MB
|====================================================== | 72% 290 MB
|====================================================== | 72% 290 MB
|====================================================== | 73% 291 MB
|====================================================== | 73% 291 MB
|====================================================== | 73% 292 MB
|====================================================== | 73% 292 MB
|======================================================= | 73% 292 MB
|======================================================= | 73% 293 MB
|======================================================= | 73% 293 MB
|======================================================= | 73% 294 MB
|======================================================= | 73% 294 MB
|======================================================= | 73% 294 MB
|======================================================= | 74% 295 MB
|======================================================= | 74% 295 MB
|======================================================= | 74% 296 MB
|======================================================= | 74% 296 MB
|======================================================= | 74% 296 MB
|======================================================= | 74% 297 MB
|======================================================= | 74% 297 MB
|======================================================== | 74% 298 MB
|======================================================== | 74% 298 MB
|======================================================== | 74% 298 MB
|======================================================== | 74% 299 MB
|======================================================== | 75% 299 MB
|======================================================== | 75% 299 MB
|======================================================== | 75% 300 MB
|======================================================== | 75% 300 MB
|======================================================== | 75% 301 MB
|======================================================== | 75% 301 MB
|======================================================== | 75% 301 MB
|======================================================== | 75% 302 MB
|======================================================== | 75% 302 MB
|======================================================== | 75% 303 MB
|========================================================= | 76% 303 MB
|========================================================= | 76% 303 MB
|========================================================= | 76% 304 MB
|========================================================= | 76% 304 MB
|========================================================= | 76% 304 MB
|========================================================= | 76% 305 MB
|========================================================= | 76% 305 MB
|========================================================= | 76% 306 MB
|========================================================= | 76% 306 MB
|========================================================= | 76% 306 MB
|========================================================= | 77% 307 MB
|========================================================= | 77% 307 MB
|========================================================= | 77% 308 MB
|========================================================= | 77% 308 MB
|========================================================== | 77% 308 MB
|========================================================== | 77% 309 MB
|========================================================== | 77% 309 MB
|========================================================== | 77% 310 MB
|========================================================== | 77% 310 MB
|========================================================== | 77% 310 MB
|========================================================== | 78% 311 MB
|========================================================== | 78% 311 MB
|========================================================== | 78% 312 MB
|========================================================== | 78% 312 MB
|========================================================== | 78% 312 MB
|========================================================== | 78% 313 MB
|========================================================== | 78% 313 MB
|=========================================================== | 78% 313 MB
|=========================================================== | 78% 314 MB
|=========================================================== | 78% 314 MB
|=========================================================== | 78% 315 MB
|=========================================================== | 79% 315 MB
|=========================================================== | 79% 315 MB
|=========================================================== | 79% 316 MB
|=========================================================== | 79% 316 MB
|=========================================================== | 79% 317 MB
|=========================================================== | 79% 317 MB
|=========================================================== | 79% 317 MB
|=========================================================== | 79% 318 MB
|=========================================================== | 79% 318 MB
|=========================================================== | 79% 319 MB
|============================================================ | 80% 319 MB
|============================================================ | 80% 319 MB
|============================================================ | 80% 320 MB
|============================================================ | 80% 320 MB
|============================================================ | 80% 320 MB
|============================================================ | 80% 321 MB
|============================================================ | 80% 321 MB
|============================================================ | 80% 322 MB
|============================================================ | 80% 322 MB
|============================================================ | 80% 322 MB
|============================================================ | 81% 323 MB
|============================================================ | 81% 323 MB
|============================================================ | 81% 324 MB
|============================================================ | 81% 324 MB
|============================================================= | 81% 324 MB
|============================================================= | 81% 325 MB
|============================================================= | 81% 325 MB
|============================================================= | 81% 326 MB
|============================================================= | 81% 326 MB
|============================================================= | 81% 326 MB
|============================================================= | 82% 327 MB
|============================================================= | 82% 327 MB
|============================================================= | 82% 328 MB
|============================================================= | 82% 328 MB
|============================================================= | 82% 328 MB
|============================================================= | 82% 329 MB
|============================================================= | 82% 329 MB
|============================================================== | 82% 330 MB
|============================================================== | 82% 330 MB
|============================================================== | 82% 330 MB
|============================================================== | 83% 331 MB
|============================================================== | 83% 331 MB
|============================================================== | 83% 332 MB
|============================================================== | 83% 332 MB
|============================================================== | 83% 332 MB
|============================================================== | 83% 333 MB
|============================================================== | 83% 333 MB
|============================================================== | 83% 333 MB
|============================================================== | 83% 334 MB
|============================================================== | 83% 334 MB
|============================================================== | 83% 335 MB
|=============================================================== | 84% 335 MB
|=============================================================== | 84% 335 MB
|=============================================================== | 84% 336 MB
|=============================================================== | 84% 336 MB
|=============================================================== | 84% 337 MB
|=============================================================== | 84% 337 MB
|=============================================================== | 84% 337 MB
|=============================================================== | 84% 338 MB
|=============================================================== | 84% 338 MB
|=============================================================== | 84% 339 MB
|=============================================================== | 85% 339 MB
|=============================================================== | 85% 339 MB
|=============================================================== | 85% 340 MB
|================================================================ | 85% 340 MB
|================================================================ | 85% 341 MB
|================================================================ | 85% 341 MB
|================================================================ | 85% 341 MB
|================================================================ | 85% 342 MB
|================================================================ | 85% 342 MB
|================================================================ | 85% 343 MB
|================================================================ | 86% 343 MB
|================================================================ | 86% 343 MB
|================================================================ | 86% 344 MB
|================================================================ | 86% 344 MB
|================================================================ | 86% 345 MB
|================================================================ | 86% 345 MB
|================================================================= | 86% 345 MB
|================================================================= | 86% 346 MB
|================================================================= | 86% 346 MB
|================================================================= | 86% 346 MB
|================================================================= | 87% 347 MB
|================================================================= | 87% 347 MB
|================================================================= | 87% 348 MB
|================================================================= | 87% 348 MB
|================================================================= | 87% 348 MB
|================================================================= | 87% 349 MB
|================================================================= | 87% 349 MB
|================================================================= | 87% 349 MB
|================================================================= | 87% 350 MB
|================================================================= | 87% 350 MB
|================================================================== | 88% 351 MB
|================================================================== | 88% 351 MB
|================================================================== | 88% 351 MB
|================================================================== | 88% 352 MB
|================================================================== | 88% 352 MB
|================================================================== | 88% 352 MB
|================================================================== | 88% 353 MB
|================================================================== | 88% 353 MB
|================================================================== | 88% 354 MB
|================================================================== | 88% 354 MB
|================================================================== | 88% 354 MB
|================================================================== | 89% 355 MB
|================================================================== | 89% 355 MB
|================================================================== | 89% 355 MB
|================================================================== | 89% 356 MB
|=================================================================== | 89% 356 MB
|=================================================================== | 89% 357 MB
|=================================================================== | 89% 357 MB
|=================================================================== | 89% 357 MB
|=================================================================== | 89% 358 MB
|=================================================================== | 89% 358 MB
|=================================================================== | 89% 358 MB
|=================================================================== | 90% 359 MB
|=================================================================== | 90% 359 MB
|=================================================================== | 90% 359 MB
|=================================================================== | 90% 360 MB
|=================================================================== | 90% 360 MB
|=================================================================== | 90% 360 MB
|=================================================================== | 90% 361 MB
|==================================================================== | 90% 361 MB
|==================================================================== | 90% 362 MB
|==================================================================== | 90% 362 MB
|==================================================================== | 90% 362 MB
|==================================================================== | 91% 363 MB
|==================================================================== | 91% 363 MB
|==================================================================== | 91% 364 MB
|==================================================================== | 91% 364 MB
|==================================================================== | 91% 364 MB
|==================================================================== | 91% 365 MB
|==================================================================== | 91% 365 MB
|==================================================================== | 91% 366 MB
|==================================================================== | 91% 366 MB
|==================================================================== | 91% 366 MB
|===================================================================== | 92% 367 MB
|===================================================================== | 92% 367 MB
|===================================================================== | 92% 367 MB
|===================================================================== | 92% 368 MB
|===================================================================== | 92% 368 MB
|===================================================================== | 92% 369 MB
|===================================================================== | 92% 369 MB
|===================================================================== | 92% 369 MB
|===================================================================== | 92% 370 MB
|===================================================================== | 92% 370 MB
|===================================================================== | 92% 370 MB
|===================================================================== | 93% 371 MB
|===================================================================== | 93% 371 MB
|===================================================================== | 93% 372 MB
|====================================================================== | 93% 372 MB
|====================================================================== | 93% 372 MB
|====================================================================== | 93% 373 MB
|====================================================================== | 93% 373 MB
|====================================================================== | 93% 373 MB
|====================================================================== | 93% 374 MB
|====================================================================== | 93% 374 MB
|====================================================================== | 93% 374 MB
|====================================================================== | 94% 375 MB
|====================================================================== | 94% 375 MB
|====================================================================== | 94% 375 MB
|====================================================================== | 94% 376 MB
|====================================================================== | 94% 376 MB
|====================================================================== | 94% 377 MB
|====================================================================== | 94% 377 MB
|======================================================================= | 94% 377 MB
|======================================================================= | 94% 378 MB
|======================================================================= | 94% 378 MB
|======================================================================= | 94% 378 MB
|======================================================================= | 95% 379 MB
|======================================================================= | 95% 379 MB
|======================================================================= | 95% 379 MB
|======================================================================= | 95% 380 MB
|======================================================================= | 95% 380 MB
|======================================================================= | 95% 380 MB
|======================================================================= | 95% 381 MB
|======================================================================= | 95% 381 MB
|======================================================================= | 95% 382 MB
|======================================================================= | 95% 382 MB
|======================================================================= | 95% 382 MB
|======================================================================= | 95% 383 MB
|======================================================================== | 96% 383 MB
|======================================================================== | 96% 383 MB
|======================================================================== | 96% 384 MB
|======================================================================== | 96% 384 MB
|======================================================================== | 96% 384 MB
|======================================================================== | 96% 385 MB
|======================================================================== | 96% 385 MB
|======================================================================== | 96% 385 MB
|======================================================================== | 96% 386 MB
|======================================================================== | 96% 386 MB
|======================================================================== | 96% 386 MB
|======================================================================== | 97% 387 MB
|======================================================================== | 97% 387 MB
|======================================================================== | 97% 387 MB
|======================================================================== | 97% 388 MB
|========================================================================= | 97% 388 MB
|========================================================================= | 97% 388 MB
|========================================================================= | 97% 389 MB
|========================================================================= | 97% 389 MB
|========================================================================= | 97% 389 MB
|========================================================================= | 97% 390 MB
|========================================================================= | 97% 390 MB
|========================================================================= | 97% 390 MB
|========================================================================= | 98% 391 MB
|========================================================================= | 98% 391 MB
|========================================================================= | 98% 391 MB
|========================================================================= | 98% 392 MB
|========================================================================= | 98% 392 MB
|========================================================================= | 98% 392 MB
|========================================================================= | 98% 393 MB
|========================================================================= | 98% 393 MB
|==========================================================================| 98% 393 MB
|==========================================================================| 98% 394 MB
|==========================================================================| 98% 394 MB
|==========================================================================| 98% 394 MB
|==========================================================================| 99% 395 MB
|==========================================================================| 99% 395 MB
|==========================================================================| 99% 395 MB
|==========================================================================| 99% 396 MB
|==========================================================================| 99% 396 MB
|==========================================================================| 99% 396 MB
|==========================================================================| 99% 397 MB
|==========================================================================| 99% 397 MB
|==========================================================================| 99% 397 MB
|==========================================================================| 99% 398 MB
|==========================================================================| 99% 398 MB
|==========================================================================| 99% 398 MB
|===========================================================================| 100% 398 MB
snps
remove 38LTR
snps <- snps %>% select(-`38LTR`)
filter to keep snps where there is data from all samples
snps <- snps %>%
filter({select(., matches("[0-9]")) %>% complete.cases() })
snps
snps <- snps %>%
mutate(TOTAL_DEPTH= {str_extract(INFO, "DP=[0-9]*") %>%
str_remove("DP=") %>%
as.numeric() }
) %>%
filter(QUAL >=100,
nchar(ALT)==1,
TOTAL_DEPTH > quantile(TOTAL_DEPTH, 0.05),
TOTAL_DEPTH < quantile(TOTAL_DEPTH, 0.95))
snps
unpack the information differnet samples:
samples <- colnames(snps) %>% str_subset("^[0-9]")
for (s in samples) {
snps <- snps %>%
separate(!!s, into=paste(s,c("gt","tot.depth","allele.depth","ref.depth","ref.qual","alt.depth","alt.qual","gt.lik"),sep="_"),
sep=":", convert = TRUE)
}
snps
next steps
keep snps that are fixed differences between LTRR and LTNR
Count per gene
first, convert to long format
snps.l <- snps %>% select(CHROM, POS, ends_with("_gt")) %>%
gather(key="plant", value="gt", -CHROM, -POS) %>%
mutate(response=str_extract(plant, "(LTR|LTWR)")) %>%
arrange(CHROM, POS, response, plant)
snps.l
filter to keep homozygous and fixed
n.samples <- length(unique(snps.l$plant))
unique(snps.l$gt)
[1] "0/1" "1/1" "0/0"
snps.l <- snps.l %>%
filter(gt != "0/1") %>% # keep homozygous
group_by(CHROM, POS, response) %>%
filter(length(unique(gt))==1) %>% # all samples for a given response have the same genotype
group_by(CHROM,POS) %>%
filter(n() == n.samples) %>% # have a gt from every plant
filter(length(unique(gt))==2) %>% # different genotypes in LTR and LTWR
arrange(CHROM, POS, response, plant)
snps.l
summarize per gene
snp.summary <- snps.l %>%
group_by(CHROM) %>%
summarize(snps=n() / n.samples) %>%
rename(contig=CHROM)
snp.summary %>% arrange(desc(snps))
snp.summary %>% arrange(desc(snps)) %>%
write_csv("../output/fixed_snps.csv")